In [1]:
import msaf
import pandas as pd
pd.set_option('precision', 4)
import numpy as np
import musicntd.scripts.overall_scripts as scr
import musicntd.data_manipulation as dm

Segmentation results of baselines methods

In this notebook are presented the results of the baseline, computed with MSAF.

We restricted the baseline to three algorithms:

  • CNMF [1],
  • Foote's novelty [2],
  • Spectral Clustering [3].
In [2]:
desired_algos = ["cnmf", "foote", "scluster"]
In [4]:
folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\Entire RWC"
annotations_type = "MIREX10"
annotations_folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\annotations\\{}".format(annotations_type)

Below is the code to compute these scores.

In [3]:
def parse_all_algos(song_path, references_segments, bars):
    # Function which computes all frontiers for this song by the desired algorithms,
    # and then computes segmentation scores.
    zero_five_to_return = []
    three_to_return = []
    
    five_rates = []
    three_rates = []

    for algo in desired_algos:
        boundaries, _ = msaf.process(song_path, boundaries_id=algo)
        segments = np.array(dm.frontiers_to_segments(boundaries))

        zero_five_to_return.append(dm.compute_score_of_segmentation(
            references_segments, segments, window_length=0.5))
        three_to_return.append(dm.compute_score_of_segmentation(
            references_segments, segments, window_length=3))
        
        five_rates.append(dm.compute_rates_of_segmentation(
            references_segments, segments, window_length=0.5))
        original_rates = dm.compute_rates_of_segmentation(
            references_segments, segments, window_length=3)
        three_rates.append(original_rates)
        
        aligned_segments = dm.align_segments_on_bars(segments, bars)
        zero_five_to_return.append(dm.compute_score_of_segmentation(
            references_segments, aligned_segments, window_length=0.5))
        three_to_return.append(dm.compute_score_of_segmentation(
            references_segments, aligned_segments, window_length=3))
        
        five_rates.append(dm.compute_rates_of_segmentation(
            references_segments, aligned_segments, window_length=0.5))
        aligned_rates = dm.compute_rates_of_segmentation(
            references_segments, aligned_segments, window_length=3)
        three_rates.append(aligned_rates)
        
    return zero_five_to_return, three_to_return, five_rates, three_rates
In [1]:
# Script which parses all songs of RWC, computes its frontiers for all algorithms, and then 
zero_point_five_results = []
three_seconds_results = []
five_rates_results = []
three_rates_results = []

paths = scr.load_RWC_dataset(folder, annotations_type = annotations_type)
persisted_path = "C:\\Users\\amarmore\\Desktop\\data_persisted\\"

for song_and_annotations in paths:
    song_path = folder + "\\" + song_and_annotations[0]
    print(song_and_annotations[0])

    annot_path = "{}\\{}".format(annotations_folder, song_and_annotations[1])
    annotations = dm.get_segmentation_from_txt(annot_path, annotations_type)
    references_segments = np.array(annotations)[:,0:2]
    
    bars = scr.load_or_save_bars(persisted_path, song_path)
    this_zero, this_three, five_rates, three_rates = parse_all_algos(song_path, references_segments, bars)

    zero_point_five_results.append(this_zero)
    three_seconds_results.append(this_three)
    
    five_rates_results.append(five_rates)
    three_rates_results.append(three_rates)

Finally, we print in a nice way the scores computed on the baseline.

In [6]:
zerofive = np.array(zero_point_five_results)
three = np.array(three_seconds_results)

all_algos = [alg for alg in desired_algos]

params = ['Original', 'Aligned on downbeats']
line = []
subline = []
for i in all_algos:
    for j in params:
        line.append(i)
        subline.append(j)
arr = []
col = [np.array(['0.5 seconds','0.5 seconds','0.5 seconds','3 seconds','3 seconds','3 seconds']),
    np.array(['Precision', 'Recall', 'F measure','Precision', 'Recall', 'F measure'])]

nested_lines = [np.array(line), np.array(subline)]

for i in range(len(line)):
    arr.append([np.mean(zerofive[:,i,0]),np.mean(zerofive[:,i,1]), np.mean(zerofive[:,i,2]),
            np.mean(three[:,i,0]),np.mean(three[:,i,1]), np.mean(three[:,i,2])])

pd.DataFrame(np.array(arr), index=nested_lines, columns=col)
Out[6]:
0.5 seconds 3 seconds
Precision Recall F measure Precision Recall F measure
cnmf Original 0.2284 0.2146 0.2152 0.4676 0.4517 0.4469
Aligned on downbeats 0.3157 0.2811 0.2881 0.5068 0.4537 0.4653
foote Original 0.2965 0.2230 0.2514 0.6389 0.4859 0.5449
Aligned on downbeats 0.4203 0.2995 0.3448 0.6706 0.4766 0.5501
scluster Original 0.3123 0.3045 0.2944 0.6065 0.6084 0.5812
Aligned on downbeats 0.4921 0.4503 0.4501 0.6554 0.6056 0.6030

References

[1] Nieto, O., & Jehan, T. (2013, May). Convex non-negative matrix factorization for automatic music structure identification. In 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (pp. 236-240). IEEE.

[2] Foote, J. (2000, July). Automatic audio segmentation using a measure of audio novelty. In 2000 IEEE International Conference on Multimedia and Expo. ICME2000. Proceedings. Latest Advances in the Fast Changing World of Multimedia (Cat. No. 00TH8532) (Vol. 1, pp. 452-455). IEEE.

[3] McFee, B., & Ellis, D. (2014). Analyzing Song Structure with Spectral Clustering. In ISMIR (pp. 405-410).